## ---- reg-demographics ----

library(sp)

load("replication_file_07_province_sp_list.RData")

region_sp_list <- list() 

region_sp_list[['2013']] <- 
  readOGR('secondary_data/istat_reg_shapefiles/Reg01012013_g', 
          'Reg01012013_g_WGS84')
region_sp_list[['2014']] <- 
  readOGR('secondary_data/istat_reg_shapefiles/Reg01012014_g', 
          'Reg01012014_g_WGS84')
region_sp_list[['2015']] <- 
  readOGR('secondary_data/istat_reg_shapefiles/Reg01012015_g', 
          'Reg01012015_g_WGS84')
region_sp_list[['2016']] <- 
  readOGR('secondary_data/istat_reg_shapefiles/Reg01012016_g', 
          'Reg01012016_g_WGS84')

for (y in 2013:2016) {
  this_dat <- province_sp_list[[as.character(y)]]@data
  this_dat <- 
    this_dat %>%
    dplyr::group_by(COD_REG) %>%
    dplyr::summarise(population = sum(population),
                     population_over_65 = sum(population_over_65),
                     foreignpop = sum(foreignpop),
                     foreignpop_africa = sum(foreignpop_africa),
                     n_meetup_events = sum(n_meetup_events))
  region_sp_list[[as.character(y)]]@data$population <- 
    this_dat$population[
      match(region_sp_list[[as.character(y)]]@data$COD_REG, this_dat$COD_REG)]
  region_sp_list[[as.character(y)]]@data$population_over_65 <- 
    this_dat$population_over_65[
      match(region_sp_list[[as.character(y)]]@data$COD_REG, this_dat$COD_REG)]
  region_sp_list[[as.character(y)]]@data$foreignpop <- 
    this_dat$foreignpop[
      match(region_sp_list[[as.character(y)]]@data$COD_REG, this_dat$COD_REG)]
  region_sp_list[[as.character(y)]]@data$foreignpop_africa <- 
    this_dat$foreignpop_africa[
      match(region_sp_list[[as.character(y)]]@data$COD_REG, this_dat$COD_REG)]
  region_sp_list[[as.character(y)]]@data$n_meetup_events <- 
    this_dat$n_meetup_events[
      match(region_sp_list[[as.character(y)]]@data$COD_REG, this_dat$COD_REG)]
}

## Load income
istat_reg_income <- 
  read_csv("secondary_data/DCCN_ISTITUZ_TNA_09072018030940580.csv")
istat_reg_income <- 
  istat_reg_income[
    istat_reg_income$`Tipo aggregato`=='reddito disponibile netto' &
      istat_reg_income$`Settore istituzionale` == 'famiglie',]
istat_reg_income <- 
  istat_reg_income[istat_reg_income$Territorio %in% 
                     c(as.character(region_sp_list[[1]]@data$DEN_REG), 
                       "Valle d'Aosta / Vallée d'Aoste", 
                       "Provincia Autonoma Bolzano / Bozen", 
                       "Provincia Autonoma Trento"),]
istat_reg_income$Territorio <- gsub(" / (.*)$", "", istat_reg_income$Territorio)
istat_reg_income$DEN_REG <- istat_reg_income$Territorio
istat_reg_income$DEN_REG[grepl("Bolzano|Trento", istat_reg_income$DEN_REG)] <- 
  'Trentino-Alto Adige/Südtirol'
istat_reg_income$DEN_REG[grepl("Aosta", istat_reg_income$DEN_REG)] <- 
  "Valle d'Aosta/Vallée d'Aoste"
istat_reg_income <-
  istat_reg_income %>%
  dplyr::group_by(DEN_REG, TIME) %>%
  dplyr::summarise(family_income = mean(Value))

## Load unemployment rate
istat_reg_unemployment <- 
  read_csv("secondary_data/DCCV_TAXDISOCCU1_09072018033752528.csv")
istat_reg_unemployment <-
  istat_reg_unemployment[istat_reg_unemployment$Sesso == 'totale' & 
                           istat_reg_unemployment$ETA1 == 'Y_GE15' &
                           istat_reg_unemployment$`Titolo di studio` == 'totale' & 
                           istat_reg_unemployment$TIME %in% 2013:2016,]
istat_reg_unemployment$Territorio <- gsub(" / ", "/", istat_reg_unemployment$Territorio)
istat_reg_unemployment$Territorio[
  istat_reg_unemployment$Territorio == 'Trentino Alto Adige/Südtirol'] <- 
  'Trentino-Alto Adige/Südtirol'
istat_reg_unemployment <- 
  istat_reg_unemployment[
    istat_reg_unemployment$Territorio %in% as.character(region_sp_list[[1]]@data$DEN_REG),]

## Load occupation rate
istat_reg_employment <- 
  read_csv("secondary_data/DCCV_TAXOCCU1_09072018035105009.csv")
istat_reg_employment <-
  istat_reg_employment[istat_reg_employment$Sesso == 'totale' & 
                         istat_reg_employment$ETA1 == 'Y_GE15' &
                         istat_reg_employment$`Titolo di studio` == 'totale' & 
                         istat_reg_employment$TIME %in% 2013:2016,]
istat_reg_employment$Territorio <- gsub(" / ", "/", istat_reg_employment$Territorio)
istat_reg_employment$Territorio[
  istat_reg_employment$Territorio == 'Trentino Alto Adige/Südtirol'] <- 
  'Trentino-Alto Adige/Südtirol'
istat_reg_employment <- 
  istat_reg_employment[
    istat_reg_employment$Territorio %in% 
      as.character(region_sp_list[[1]]@data$DEN_REG),]

## Add
for (y in 2013:2016) {
  print(y)
  this_unemp <- subset(istat_reg_unemployment[istat_reg_unemployment$TIME == y,])
  this_emp <- subset(istat_reg_employment[istat_reg_employment$TIME == y,])
  this_income <- subset(istat_reg_income[istat_reg_income$TIME == y,])
  
  region_sp_list[[as.character(y)]]$unemployment <- 
    this_unemp$Value[
      match(region_sp_list[[as.character(y)]]$DEN_REG, this_unemp$Territorio)]
  region_sp_list[[as.character(y)]]$employment <-
    this_emp$Value[
      match(region_sp_list[[as.character(y)]]$DEN_REG, this_emp$Territorio)]
  region_sp_list[[as.character(y)]]$income <-
    this_income$family_income[
      match(region_sp_list[[as.character(y)]]$DEN_REG, this_income$DEN_REG)]
}

## Survey 2013
load("secondary_data/istat_aspetti_survey/AVQ_2013_IT_R/MICRODATI/DF_AVQ_A2013.RData")

DF_AVQ_A2013$reg[DF_AVQ_A2013$reg %in% c(41,42)] <- 300

DF_AVQ_A2013$coemicro <- 
  as.numeric(as.character(DF_AVQ_A2013$coemicro))

# Correlation local / national
with(DF_AVQ_A2013, cor.test(puntifi4 + puntifi1, puntifi8 + puntifi9 + puntifi10))
DF_AVQ_A2013$pol_trust_diff <- 
  with(DF_AVQ_A2013, (puntifi1 + puntifi4) - (puntifi8 + puntifi9 + puntifi10))

with(DF_AVQ_A2013, cor.test((puntifi8 + puntifi9 + puntifi10), volon))

survey2013 <-
  DF_AVQ_A2013 %>%
  dplyr::group_by(reg = sprintf("%03d", reg)) %>%
  dplyr::summarize(
    
    ppapo_mean = weighted.mean(ppapo==2, coemicro, na.rm = T),
    psind_mean = weighted.mean(psind==4, coemicro, na.rm = T),
    pgrvo_mean = weighted.mean(pgrvo==6, coemicro, na.rm = T),
    paeco_mean = weighted.mean(paeco==2, coemicro, na.rm = T),
    
    puntifi1_mean = weighted.mean(puntifi1, coemicro, na.rm = T),
    puntifi2_mean = weighted.mean(puntifi2, coemicro, na.rm = T),
    puntifi3_mean = weighted.mean(puntifi3, coemicro, na.rm = T),
    puntifi4_mean = weighted.mean(puntifi4, coemicro, na.rm = T),
    puntifi5_mean = weighted.mean(puntifi5, coemicro, na.rm = T),
    puntifi8_mean = weighted.mean(puntifi8, coemicro, na.rm = T),
    puntifi9_mean = weighted.mean(puntifi9, coemicro, na.rm = T),
    puntifi10_mean = weighted.mean(puntifi10, coemicro, na.rm = T),
    
    pol_trust_diff_mean = weighted.mean(pol_trust_diff, coemicro, na.rm = T),
    
    comiz_mean = weighted.mean(comiz, coemicro, na.rm = T),
    cortei_mean = weighted.mean(cortei, coemicro, na.rm = T),
    dibpo_mean = weighted.mean(dibpo, coemicro, na.rm = T),
    
    volon_mean = weighted.mean(volon==4, coemicro, na.rm = T),
    degree_perc = weighted.mean(istr < 3, coemicro, na.rm = T),
    survey_n = n()
    
  )

region_labels_2013 <- 
  c("Piemonte", "Valle d'Aosta/Vallée d'Aoste", 
    "Lombardia", "Bolzano", "Trento", "Veneto", 
    "Friuli-Venezia Giulia", "Liguria", "Emilia-Romagna", 
    "Toscana", "Umbria", "Marche", 
    "Lazio", "Abruzzo", "Molise", "Campania", "Puglia", 
    "Basilicata", "Calabria", "Sicilia", 
    "Sardegna", "Trentino-Alto Adige/Südtirol")
names(region_labels_2013) <- 
  c("010", "020", "030", "041", "042", "050", "060", "070", 
    "080", "090", "100", "110", "120", "130", 
    "140", "150", "160", "170", "180", "190", "200", "300")

survey2013$DEN_REG <-  region_labels_2013[match(survey2013$reg, names(region_labels_2013))]
survey2013$reg <- NULL

region_sp_list[['2013']] <- merge(region_sp_list[['2013']], survey2013, by = "DEN_REG")

## Survey 2014
load("secondary_data/istat_aspetti_survey/AVQ_2014_IT_R/MICRODATI/DF_AVQ_A2014.RData")

DF_AVQ_A2014 <- DF_AVQ_A2014[DF_AVQ_A2014$REGMif != 999,]

names(DF_AVQ_A2014) <- tolower(names(DF_AVQ_A2014))

DF_AVQ_A2014$coemicro <- as.numeric(as.character(DF_AVQ_A2014$coefin))

DF_AVQ_A2014$pol_trust_diff <- 
  with(DF_AVQ_A2014, (puntifi1 + puntifi4) - (puntifi8 + puntifi9 + puntifi10))

survey2014 <-
  DF_AVQ_A2014 %>%
  dplyr::group_by(reg = sprintf("%03d", regmif)) %>%
  dplyr::summarize(
    
    ppapo_mean = weighted.mean(ppapo==2, coemicro, na.rm = T),
    psind_mean = weighted.mean(psind==4, coemicro, na.rm = T),
    pgrvo_mean = weighted.mean(pgrvo==6, coemicro, na.rm = T),
    paeco_mean = weighted.mean(paeco==2, coemicro, na.rm = T),
    
    puntifi1_mean = weighted.mean(puntifi1, coemicro, na.rm = T),
    puntifi2_mean = weighted.mean(puntifi2, coemicro, na.rm = T),
    puntifi3_mean = weighted.mean(puntifi3, coemicro, na.rm = T),
    puntifi4_mean = weighted.mean(puntifi4, coemicro, na.rm = T),
    puntifi5_mean = weighted.mean(puntifi5, coemicro, na.rm = T),
    puntifi8_mean = weighted.mean(puntifi8, coemicro, na.rm = T),
    puntifi9_mean = weighted.mean(puntifi9, coemicro, na.rm = T),
    puntifi10_mean = weighted.mean(puntifi10, coemicro, na.rm = T),
    
    pol_trust_diff_mean = weighted.mean(pol_trust_diff, coemicro, na.rm = T),
    
    comiz_mean = weighted.mean(comiz, coemicro, na.rm = T),
    cortei_mean = weighted.mean(cortei, coemicro, na.rm = T),
    dibpo_mean = weighted.mean(dibpo, coemicro, na.rm = T),
    
    volon_mean = weighted.mean(volon==4, coemicro, na.rm = T),
    degree_perc = weighted.mean(istrmi==1, coemicro, na.rm = T),
    survey_n = n()
    
  )

region_labels_2014 <- 
  c("Piemonte", "Valle d'Aosta/Vallée d'Aoste", "Lombardia", 
    "Trentino-Alto Adige/Südtirol", "Veneto", 
    "Friuli-Venezia Giulia", "Liguria", "Emilia-Romagna", 
    "Toscana", "Umbria", "Marche", "Lazio", 
    "Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", 
    "Calabria", "Sicilia", "Sardegna", 
    "non disponibile")
names(region_labels_2014) <- 
  c("010", "020", "030", "040", "050", "060", "070", "080", "090", "100", "110", "120", "130", 
    "140", "150", "160", "170", "180", "190", "200", "999")

survey2014$DEN_REG <-  region_labels_2014[match(survey2014$reg, names(region_labels_2014))] 
survey2014$reg <- NULL

region_sp_list[['2014']] <- merge(region_sp_list[['2014']], survey2014, by = "DEN_REG")

## Survey 2015
load("secondary_data/istat_aspetti_survey/AVQ_2015_IT_R/MICRODATI/DF_AVQ_A2015.RData")

DF_AVQ_A2015 <- DF_AVQ_A2015[DF_AVQ_A2015$REGMif != 999,]

names(DF_AVQ_A2015) <- tolower(names(DF_AVQ_A2015))

DF_AVQ_A2015$coemicro <- as.numeric(as.character(DF_AVQ_A2015$coefin))

DF_AVQ_A2015$pol_trust_diff <- 
  with(DF_AVQ_A2015, (puntifi1 + puntifi4) - (puntifi8 + puntifi9 + puntifi10))

survey2015 <-
  DF_AVQ_A2015 %>%
  dplyr::group_by(reg = sprintf("%03d", regmif)) %>%
  dplyr::summarize(
    
    ppapo_mean = weighted.mean(ppapo==2, coemicro, na.rm = T),
    psind_mean = weighted.mean(psind==4, coemicro, na.rm = T),
    pgrvo_mean = weighted.mean(pgrvo==6, coemicro, na.rm = T),
    paeco_mean = weighted.mean(paeco==2, coemicro, na.rm = T),
    
    puntifi1_mean = weighted.mean(puntifi1, coemicro, na.rm = T),
    puntifi2_mean = weighted.mean(puntifi2, coemicro, na.rm = T),
    puntifi3_mean = weighted.mean(puntifi3, coemicro, na.rm = T),
    puntifi4_mean = weighted.mean(puntifi4, coemicro, na.rm = T),
    puntifi5_mean = weighted.mean(puntifi5, coemicro, na.rm = T),
    puntifi8_mean = weighted.mean(puntifi8, coemicro, na.rm = T),
    puntifi9_mean = weighted.mean(puntifi9, coemicro, na.rm = T),
    puntifi10_mean = weighted.mean(puntifi10, coemicro, na.rm = T),
    
    pol_trust_diff_mean = weighted.mean(pol_trust_diff, coemicro, na.rm = T),
    
    comiz_mean = weighted.mean(comiz, coemicro, na.rm = T),
    cortei_mean = weighted.mean(cortei, coemicro, na.rm = T),
    dibpo_mean = weighted.mean(dibpo, coemicro, na.rm = T),
    
    volon_mean = weighted.mean(volon==4, coemicro, na.rm = T),
    degree_perc = weighted.mean(istrmi==1, coemicro, na.rm = T),
    survey_n = n()
    
  )

region_labels_2015 <- 
  c("Piemonte", "Valle d'Aosta/Vallée d'Aoste", "Lombardia", 
    "Trentino-Alto Adige/Südtirol", "Veneto", 
    "Friuli-Venezia Giulia", "Liguria", "Emilia-Romagna", 
    "Toscana", "Umbria", "Marche", "Lazio", 
    "Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", 
    "Calabria", "Sicilia", "Sardegna", 
    "non disponibile")
names(region_labels_2015) <- 
  c("010", "020", "030", "040", "050", "060", "070", "080", 
    "090", "100", "110", "120", "130", 
    "140", "150", "160", "170", "180", "190", "200", "999")

survey2015$DEN_REG <-  region_labels_2015[match(survey2015$reg, names(region_labels_2015))] 
survey2015$reg <- NULL

region_sp_list[['2015']] <- merge(region_sp_list[['2015']], survey2015, by = "DEN_REG")

## Survey 2016
load("secondary_data/istat_aspetti_survey/AVQ_2016_IT_R/MICRODATI/DF_AVQ_A2016.RData")

DF_AVQ_A2016 <- DF_AVQ_A2016[DF_AVQ_A2016$REGMiF != 999,]

names(DF_AVQ_A2016) <- tolower(names(DF_AVQ_A2016))

DF_AVQ_A2016$coemicro <- as.numeric(as.character(DF_AVQ_A2016$coefin))

DF_AVQ_A2016$pol_trust_diff <- 
  with(DF_AVQ_A2016, (puntifi1 + puntifi4) - (puntifi8 + puntifi9 + puntifi10))

survey2016 <-
  DF_AVQ_A2016 %>%
  dplyr::group_by(reg = sprintf("%03d", regmif)) %>%
  dplyr::summarize(
    
    ppapo_mean = weighted.mean(ppapo==2, coemicro, na.rm = T),
    psind_mean = weighted.mean(psind==4, coemicro, na.rm = T),
    pgrvo_mean = weighted.mean(pgrvo==6, coemicro, na.rm = T),
    paeco_mean = weighted.mean(paeco==2, coemicro, na.rm = T),
    
    puntifi1_mean = weighted.mean(puntifi1, coemicro, na.rm = T),
    puntifi2_mean = weighted.mean(puntifi2, coemicro, na.rm = T),
    puntifi3_mean = weighted.mean(puntifi3, coemicro, na.rm = T),
    puntifi4_mean = weighted.mean(puntifi4, coemicro, na.rm = T),
    puntifi5_mean = weighted.mean(puntifi5, coemicro, na.rm = T),
    puntifi8_mean = weighted.mean(puntifi8, coemicro, na.rm = T),
    puntifi9_mean = weighted.mean(puntifi9, coemicro, na.rm = T),
    puntifi10_mean = weighted.mean(puntifi10, coemicro, na.rm = T),
    
    pol_trust_diff_mean = weighted.mean(pol_trust_diff, coemicro, na.rm = T),
    
    comiz_mean = weighted.mean(comiz, coemicro, na.rm = T),
    cortei_mean = weighted.mean(cortei, coemicro, na.rm = T),
    dibpo_mean = weighted.mean(dibpo, coemicro, na.rm = T),
    
    volon_mean = weighted.mean(volon==4, coemicro, na.rm = T),
    degree_perc = weighted.mean(istrmi==1, coemicro, na.rm = T),
    survey_n = n()
    
  )

region_labels_2016 <- 
  c("Piemonte", "Valle d'Aosta/Vallée d'Aoste", "Lombardia", 
    "Trentino-Alto Adige/Südtirol", "Veneto", 
    "Friuli-Venezia Giulia", "Liguria", "Emilia-Romagna", "Toscana", 
    "Umbria", "Marche", "Lazio", 
    "Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", "Calabria",
    "Sicilia", "Sardegna", 
    "non disponibile")

names(region_labels_2016) <- 
  c("010", "020", "030", "040", "050", "060", "070", "080", "090", 
    "100", "110", "120", "130", 
    "140", "150", "160", "170", "180", "190", "200", "999")

survey2016$DEN_REG <-  
  region_labels_2016[match(survey2016$reg, 
                           names(region_labels_2016))] 
survey2016$reg <- NULL

region_sp_list[['2016']] <-
  merge(region_sp_list[['2016']], survey2016, by = "DEN_REG")

save(region_sp_list, file = "replication_file_08_region_sp_list.RData")
